FILENAME: Analysis Notebook.ipynb
PROJECT: Multivariate Financial Forecasting
DATE CREATED:24-APR-20
DATE UPDATED:24-APR-20
TASK: Develop and implement a recurrent neural network
PURPOSE: Given a multivariate dataset, forecast and predict the corresponding response value for each record
INTENT: The purpose of this project is to conduct exploratory analysis of the provided data set and apply both supervised and unsupervised algorithms in order to extract meaniningful information in support for future open source analysis. The project is broken down into two separate projects, with each project having four (4) distinct phases:
PROJECT: Randomized Budget Data
Environment Setup
Data ETL
Data Exploration
Model Development
Create randomm arrays to store the test values:
YEAR +5: yr5_forecast
YEAR +4: yr4_forecast
YEAR +3: yr3_forecast
YEAR +2: yr2_forecast
YEAR +1: yr1_forecast
YEAR +0: plan
YEAR -1: approp
YEAR -2: obligate
from IPython.display import Image
from IPython.core.display import HTML
Image(filename = "data/rnn.png", width=750, height=750)
Import the required ML & neural net libraries
from scipy import stats
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
def init_array(df_length):
'''
DESCRIPTION: A function to create and return a two_dimensional array with randomized float values
'''
length = df_length
yr5_forecast= np.random.randint(low = 100000, high = 30000000, size = df_length)
yr4_random = np.random.uniform(low=0.5, high=1.5, size=df_length)
yr4_forecast = np.round(yr5_prep * yr4_random,2)
yr3_random = np.random.uniform(low=0.6, high=1.4, size=df_length)
yr3_forecast = np.round(yr4_forecast * yr3_random,2)
yr2_random = np.random.uniform(low=0.7, high=1.3, size=df_length)
yr2_forecast = np.round(yr3_plan * yr2_random,2)
yr1_random = np.random.uniform(low=0.8, high=1.2, size=df_length)
yr1_forecast = np.round(yr2_approp * yr1_random,2)
plan_random = np.random.uniform(low=0.6, high=1.3, size=df_length)
plan_val = np.round(yr1_oblig * plan_random,2)
approp_random = np.random.uniform(low=0.6, high=1.2, size=df_length)
approp_val = np.round(plan_val * approp_random,2)
oblig_random = np.random.uniform(low=0.7, high=1.0, size=df_length)
oblig_val = np.round(approp_val * oblig_random,2)
raw_df = pd.DataFrame(columns=['yr5_prep','yr4_forecast','yr3_plan','yr2_approp','yr1_oblig','yr0_exe'])
raw_df['yr5_forecast'] = yr5_forecast
raw_df['yr4_forecast'] = yr4_forecast
raw_df['yr3_forecast'] = yr3_forecast
raw_df['yr2_forecast'] = yr2_forecast
raw_df['yr1_forecast'] = yr1_forecast
raw_df['planned'] = plan_val
raw_df['appropriated'] = approp_val
raw_df['obligated'] = oblig_val
return raw_df
Create random arrays to store the test values:
Create the training array
train_df = init_array(10000)
train_df.tail(10)
| yr5_prep | yr4_forecast | yr3_plan | yr2_approp | yr1_oblig | yr0_exe | |
|---|---|---|---|---|---|---|
| 9990 | 18026593 | 11719793.83 | 10252460.42 | 8548599.56 | 6843037.18 | 6584383.63 |
| 9991 | 16001938 | 18238303.28 | 17019703.09 | 12351064.29 | 10384124.33 | 10974971.86 |
| 9992 | 7665685 | 8701791.76 | 10944103.73 | 10263301.08 | 8306113.46 | 7636624.72 |
| 9993 | 8882983 | 13124867.66 | 8264207.94 | 8899896.88 | 8610564.29 | 7933271.04 |
| 9994 | 16116612 | 10784299.52 | 13975995.48 | 15237893.07 | 13159988.67 | 13665968.25 |
| 9995 | 11482868 | 6095959.92 | 5435329.00 | 5421774.73 | 4761872.43 | 4965473.12 |
| 9996 | 10531133 | 15547524.15 | 19854138.15 | 17825747.58 | 18512099.67 | 20026330.19 |
| 9997 | 10506494 | 8075742.33 | 9269229.45 | 8824067.22 | 8616674.83 | 7811565.41 |
| 9998 | 983060 | 644635.24 | 897246.02 | 1134034.52 | 959303.88 | 1041286.18 |
| 9999 | 20317213 | 12373289.72 | 14266448.62 | 16684225.58 | 17909736.38 | 16840201.48 |
col_list = list(train_df.columns)
col_list
['yr5_prep', 'yr4_forecast', 'yr3_plan', 'yr2_approp', 'yr1_oblig', 'yr0_exe']
dataset = train_df.copy()
dataset.tail(10)
| yr5_prep | yr4_forecast | yr3_plan | yr2_approp | yr1_oblig | yr0_exe | |
|---|---|---|---|---|---|---|
| 9990 | 18026593 | 11719793.83 | 10252460.42 | 8548599.56 | 6843037.18 | 6584383.63 |
| 9991 | 16001938 | 18238303.28 | 17019703.09 | 12351064.29 | 10384124.33 | 10974971.86 |
| 9992 | 7665685 | 8701791.76 | 10944103.73 | 10263301.08 | 8306113.46 | 7636624.72 |
| 9993 | 8882983 | 13124867.66 | 8264207.94 | 8899896.88 | 8610564.29 | 7933271.04 |
| 9994 | 16116612 | 10784299.52 | 13975995.48 | 15237893.07 | 13159988.67 | 13665968.25 |
| 9995 | 11482868 | 6095959.92 | 5435329.00 | 5421774.73 | 4761872.43 | 4965473.12 |
| 9996 | 10531133 | 15547524.15 | 19854138.15 | 17825747.58 | 18512099.67 | 20026330.19 |
| 9997 | 10506494 | 8075742.33 | 9269229.45 | 8824067.22 | 8616674.83 | 7811565.41 |
| 9998 | 983060 | 644635.24 | 897246.02 | 1134034.52 | 959303.88 | 1041286.18 |
| 9999 | 20317213 | 12373289.72 | 14266448.62 | 16684225.58 | 17909736.38 | 16840201.48 |
import plotly.graph_objects as go
import numpy as np
np.random.seed(1)
y5 = dataset.yr5_prep
y4 = dataset.yr4_forecast
y3 = dataset.yr3_plan
y2 = dataset.yr2_approp
y1 = dataset.yr1_oblig
y0 = dataset.yr0_exe
fig = go.Figure()
fig.add_trace(go.Box(x=y0, name = "yr5_prep"))
fig.add_trace(go.Box(x=y1, name = "yr4_forecast"))
fig.add_trace(go.Box(x=y2, name = "yr3_plan"))
fig.add_trace(go.Box(x=y3, name = "yr2_approp"))
fig.add_trace(go.Box(x=y4, name = "yr1_oblig"))
fig.add_trace(go.Box(x=y5, name = "yr0_exe"))
fig.show()
budget_pair = train_df[['yr5_prep','yr4_forecast','yr3_plan','yr2_approp','yr1_oblig','yr0_exe']]
sns.set(style="ticks", color_codes=True)
sns.pairplot(budget_pair)
<seaborn.axisgrid.PairGrid at 0x1a47066250>
Convert dataframe to numpy arrays
x=dataset.iloc[:, 0:5].to_numpy()
y=dataset.iloc[:,5].to_numpy()
x
array([[20166699. , 10102709.39, 13694132.67, 15382231.73, 16363108.89],
[19076027. , 17222201.83, 10748031.49, 13096649.89, 12479453.47],
[ 8588777. , 4736913.01, 5555264.82, 4186791.11, 4604302.43],
...,
[10506494. , 8075742.33, 9269229.45, 8824067.22, 8616674.83],
[ 983060. , 644635.24, 897246.02, 1134034.52, 959303.88],
[20317213. , 12373289.72, 14266448.62, 16684225.58, 17909736.38]])
y=np.reshape(y, (-1,1))
y
array([[17279478.78],
[12576967.76],
[ 4257302.59],
...,
[ 7811565.41],
[ 1041286.18],
[16840201.48]])
Scale the data from 0 -> 1
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
print(scaler_x.fit(x))
xscale=scaler_x.transform(x)
print(scaler_y.fit(y))
yscale=scaler_y.transform(y)
MinMaxScaler(copy=True, feature_range=(0, 1)) MinMaxScaler(copy=True, feature_range=(0, 1))
Segregate master data to 'train', 'test', 'split'
X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)
Verify the array shape
X_train.shape
(7500, 5)
y_train is the response variable
y_train.shape
(7500, 1)
model = Sequential()
model.add(Dense(10, input_dim=5, kernel_initializer='normal', activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense (Dense) (None, 10) 60 _________________________________________________________________ dense_1 (Dense) (None, 5) 55 _________________________________________________________________ dense_2 (Dense) (None, 1) 6 ================================================================= Total params: 121 Trainable params: 121 Non-trainable params: 0 _________________________________________________________________
model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])
history = model.fit(X_train, y_train, epochs=150, batch_size=50, verbose=1, validation_split=0.2)
Train on 6000 samples, validate on 1500 samples Epoch 1/150 6000/6000 [==============================] - 0s 68us/sample - loss: 0.0211 - mse: 0.0211 - mae: 0.1023 - val_loss: 0.0082 - val_mse: 0.0082 - val_mae: 0.0735 Epoch 2/150 6000/6000 [==============================] - 0s 18us/sample - loss: 0.0057 - mse: 0.0057 - mae: 0.0583 - val_loss: 0.0035 - val_mse: 0.0035 - val_mae: 0.0444 Epoch 3/150 6000/6000 [==============================] - 0s 18us/sample - loss: 0.0027 - mse: 0.0027 - mae: 0.0364 - val_loss: 0.0021 - val_mse: 0.0021 - val_mae: 0.0316 Epoch 4/150 6000/6000 [==============================] - 0s 17us/sample - loss: 0.0019 - mse: 0.0019 - mae: 0.0301 - val_loss: 0.0016 - val_mse: 0.0016 - val_mae: 0.0281 Epoch 5/150 6000/6000 [==============================] - 0s 18us/sample - loss: 0.0015 - mse: 0.0015 - mae: 0.0269 - val_loss: 0.0013 - val_mse: 0.0013 - val_mae: 0.0249 Epoch 6/150 6000/6000 [==============================] - 0s 17us/sample - loss: 0.0012 - mse: 0.0012 - mae: 0.0237 - val_loss: 9.9717e-04 - val_mse: 9.9717e-04 - val_mae: 0.0218 Epoch 7/150 6000/6000 [==============================] - 0s 18us/sample - loss: 9.4911e-04 - mse: 9.4911e-04 - mae: 0.0210 - val_loss: 8.1985e-04 - val_mse: 8.1985e-04 - val_mae: 0.0198 Epoch 8/150 6000/6000 [==============================] - 0s 18us/sample - loss: 7.9287e-04 - mse: 7.9287e-04 - mae: 0.0192 - val_loss: 7.0595e-04 - val_mse: 7.0595e-04 - val_mae: 0.0185 Epoch 9/150 6000/6000 [==============================] - 0s 20us/sample - loss: 6.8363e-04 - mse: 6.8363e-04 - mae: 0.0179 - val_loss: 6.1725e-04 - val_mse: 6.1725e-04 - val_mae: 0.0172 Epoch 10/150 6000/6000 [==============================] - 0s 24us/sample - loss: 6.0131e-04 - mse: 6.0131e-04 - mae: 0.0168 - val_loss: 5.5389e-04 - val_mse: 5.5389e-04 - val_mae: 0.0163 Epoch 11/150 6000/6000 [==============================] - 0s 23us/sample - loss: 5.4621e-04 - mse: 5.4621e-04 - mae: 0.0161 - val_loss: 5.0805e-04 - val_mse: 5.0805e-04 - val_mae: 0.0158 Epoch 12/150 6000/6000 [==============================] - 0s 23us/sample - loss: 4.9313e-04 - mse: 4.9313e-04 - mae: 0.0152 - val_loss: 4.6022e-04 - val_mse: 4.6022e-04 - val_mae: 0.0147 Epoch 13/150 6000/6000 [==============================] - 0s 23us/sample - loss: 4.4892e-04 - mse: 4.4892e-04 - mae: 0.0145 - val_loss: 4.2333e-04 - val_mse: 4.2333e-04 - val_mae: 0.0142 Epoch 14/150 6000/6000 [==============================] - 0s 22us/sample - loss: 4.1312e-04 - mse: 4.1312e-04 - mae: 0.0138 - val_loss: 4.1335e-04 - val_mse: 4.1335e-04 - val_mae: 0.0137 Epoch 15/150 6000/6000 [==============================] - 0s 23us/sample - loss: 3.9215e-04 - mse: 3.9215e-04 - mae: 0.0134 - val_loss: 3.8071e-04 - val_mse: 3.8071e-04 - val_mae: 0.0135 Epoch 16/150 6000/6000 [==============================] - 0s 25us/sample - loss: 3.6233e-04 - mse: 3.6233e-04 - mae: 0.0128 - val_loss: 3.4913e-04 - val_mse: 3.4913e-04 - val_mae: 0.0127 Epoch 17/150 6000/6000 [==============================] - 0s 19us/sample - loss: 3.4772e-04 - mse: 3.4772e-04 - mae: 0.0126 - val_loss: 3.5714e-04 - val_mse: 3.5714e-04 - val_mae: 0.0133 Epoch 18/150 6000/6000 [==============================] - 0s 19us/sample - loss: 3.2836e-04 - mse: 3.2836e-04 - mae: 0.0122 - val_loss: 3.1901e-04 - val_mse: 3.1901e-04 - val_mae: 0.0120 Epoch 19/150 6000/6000 [==============================] - 0s 18us/sample - loss: 3.1338e-04 - mse: 3.1338e-04 - mae: 0.0119 - val_loss: 3.0683e-04 - val_mse: 3.0683e-04 - val_mae: 0.0119 Epoch 20/150 6000/6000 [==============================] - 0s 19us/sample - loss: 3.0091e-04 - mse: 3.0091e-04 - mae: 0.0116 - val_loss: 2.9699e-04 - val_mse: 2.9699e-04 - val_mae: 0.0116 Epoch 21/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.8917e-04 - mse: 2.8917e-04 - mae: 0.0114 - val_loss: 2.8359e-04 - val_mse: 2.8359e-04 - val_mae: 0.0114 Epoch 22/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.8194e-04 - mse: 2.8194e-04 - mae: 0.0114 - val_loss: 3.0012e-04 - val_mse: 3.0012e-04 - val_mae: 0.0117 Epoch 23/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.7462e-04 - mse: 2.7462e-04 - mae: 0.0113 - val_loss: 2.6641e-04 - val_mse: 2.6641e-04 - val_mae: 0.0113 Epoch 24/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.6543e-04 - mse: 2.6543e-04 - mae: 0.0110 - val_loss: 2.6311e-04 - val_mse: 2.6311e-04 - val_mae: 0.0111 Epoch 25/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.5967e-04 - mse: 2.5967e-04 - mae: 0.0110 - val_loss: 2.6801e-04 - val_mse: 2.6801e-04 - val_mae: 0.0111 Epoch 26/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.5436e-04 - mse: 2.5436e-04 - mae: 0.0108 - val_loss: 2.4557e-04 - val_mse: 2.4557e-04 - val_mae: 0.0108 Epoch 27/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.4999e-04 - mse: 2.4999e-04 - mae: 0.0108 - val_loss: 2.4405e-04 - val_mse: 2.4405e-04 - val_mae: 0.0110 Epoch 28/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.4245e-04 - mse: 2.4245e-04 - mae: 0.0107 - val_loss: 2.3689e-04 - val_mse: 2.3689e-04 - val_mae: 0.0106 Epoch 29/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.3582e-04 - mse: 2.3582e-04 - mae: 0.0105 - val_loss: 2.3697e-04 - val_mse: 2.3697e-04 - val_mae: 0.0107 Epoch 30/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.3901e-04 - mse: 2.3901e-04 - mae: 0.0106 - val_loss: 2.3478e-04 - val_mse: 2.3478e-04 - val_mae: 0.0106 Epoch 31/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.2834e-04 - mse: 2.2834e-04 - mae: 0.0104 - val_loss: 2.5569e-04 - val_mse: 2.5569e-04 - val_mae: 0.0110 Epoch 32/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.2468e-04 - mse: 2.2468e-04 - mae: 0.0103 - val_loss: 2.2606e-04 - val_mse: 2.2606e-04 - val_mae: 0.0106 Epoch 33/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.2869e-04 - mse: 2.2869e-04 - mae: 0.0104 - val_loss: 2.2576e-04 - val_mse: 2.2576e-04 - val_mae: 0.0106 Epoch 34/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.2003e-04 - mse: 2.2003e-04 - mae: 0.0102 - val_loss: 2.2317e-04 - val_mse: 2.2317e-04 - val_mae: 0.0104 Epoch 35/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1810e-04 - mse: 2.1810e-04 - mae: 0.0101 - val_loss: 2.2258e-04 - val_mse: 2.2258e-04 - val_mae: 0.0104 Epoch 36/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1833e-04 - mse: 2.1833e-04 - mae: 0.0102 - val_loss: 2.2071e-04 - val_mse: 2.2071e-04 - val_mae: 0.0104 Epoch 37/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1808e-04 - mse: 2.1808e-04 - mae: 0.0103 - val_loss: 2.2272e-04 - val_mse: 2.2272e-04 - val_mae: 0.0103 Epoch 38/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1179e-04 - mse: 2.1179e-04 - mae: 0.0101 - val_loss: 2.1593e-04 - val_mse: 2.1593e-04 - val_mae: 0.0102 Epoch 39/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1957e-04 - mse: 2.1957e-04 - mae: 0.0103 - val_loss: 2.1181e-04 - val_mse: 2.1181e-04 - val_mae: 0.0103 Epoch 40/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1209e-04 - mse: 2.1209e-04 - mae: 0.0101 - val_loss: 2.4627e-04 - val_mse: 2.4627e-04 - val_mae: 0.0113 Epoch 41/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1937e-04 - mse: 2.1937e-04 - mae: 0.0104 - val_loss: 2.2676e-04 - val_mse: 2.2676e-04 - val_mae: 0.0104 Epoch 42/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.1169e-04 - mse: 2.1169e-04 - mae: 0.0101 - val_loss: 2.3090e-04 - val_mse: 2.3090e-04 - val_mae: 0.0107 Epoch 43/150 6000/6000 [==============================] - 0s 24us/sample - loss: 2.1398e-04 - mse: 2.1398e-04 - mae: 0.0102 - val_loss: 2.1511e-04 - val_mse: 2.1511e-04 - val_mae: 0.0102 Epoch 44/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.1303e-04 - mse: 2.1303e-04 - mae: 0.0102 - val_loss: 2.2474e-04 - val_mse: 2.2474e-04 - val_mae: 0.0106 Epoch 45/150 6000/6000 [==============================] - 0s 25us/sample - loss: 2.1014e-04 - mse: 2.1014e-04 - mae: 0.0101 - val_loss: 2.1104e-04 - val_mse: 2.1104e-04 - val_mae: 0.0104 Epoch 46/150 6000/6000 [==============================] - 0s 26us/sample - loss: 2.0544e-04 - mse: 2.0544e-04 - mae: 0.0099 - val_loss: 2.0892e-04 - val_mse: 2.0892e-04 - val_mae: 0.0103 Epoch 47/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0935e-04 - mse: 2.0935e-04 - mae: 0.0101 - val_loss: 2.1317e-04 - val_mse: 2.1317e-04 - val_mae: 0.0105 Epoch 48/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0776e-04 - mse: 2.0776e-04 - mae: 0.0100 - val_loss: 2.1368e-04 - val_mse: 2.1368e-04 - val_mae: 0.0105 Epoch 49/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1132e-04 - mse: 2.1132e-04 - mae: 0.0101 - val_loss: 2.1818e-04 - val_mse: 2.1818e-04 - val_mae: 0.0106 Epoch 50/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0981e-04 - mse: 2.0981e-04 - mae: 0.0101 - val_loss: 2.0862e-04 - val_mse: 2.0862e-04 - val_mae: 0.0101 Epoch 51/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0787e-04 - mse: 2.0787e-04 - mae: 0.0100 - val_loss: 2.1155e-04 - val_mse: 2.1155e-04 - val_mae: 0.0101 Epoch 52/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0618e-04 - mse: 2.0618e-04 - mae: 0.0100 - val_loss: 2.0669e-04 - val_mse: 2.0669e-04 - val_mae: 0.0100 Epoch 53/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.1114e-04 - mse: 2.1114e-04 - mae: 0.0101 - val_loss: 2.1414e-04 - val_mse: 2.1414e-04 - val_mae: 0.0102 Epoch 54/150 6000/6000 [==============================] - 0s 24us/sample - loss: 2.1053e-04 - mse: 2.1053e-04 - mae: 0.0101 - val_loss: 2.0972e-04 - val_mse: 2.0972e-04 - val_mae: 0.0102 Epoch 55/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0661e-04 - mse: 2.0661e-04 - mae: 0.0100 - val_loss: 2.1454e-04 - val_mse: 2.1454e-04 - val_mae: 0.0103 Epoch 56/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1004e-04 - mse: 2.1004e-04 - mae: 0.0101 - val_loss: 2.0719e-04 - val_mse: 2.0719e-04 - val_mae: 0.0102 Epoch 57/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0832e-04 - mse: 2.0832e-04 - mae: 0.0101 - val_loss: 2.2096e-04 - val_mse: 2.2096e-04 - val_mae: 0.0104 Epoch 58/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0746e-04 - mse: 2.0746e-04 - mae: 0.0100 - val_loss: 2.0859e-04 - val_mse: 2.0859e-04 - val_mae: 0.0101 Epoch 59/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.2035e-04 - mse: 2.2035e-04 - mae: 0.0104 - val_loss: 2.0621e-04 - val_mse: 2.0621e-04 - val_mae: 0.0100 Epoch 60/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0978e-04 - mse: 2.0978e-04 - mae: 0.0102 - val_loss: 2.0770e-04 - val_mse: 2.0770e-04 - val_mae: 0.0102 Epoch 61/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0611e-04 - mse: 2.0611e-04 - mae: 0.0100 - val_loss: 2.1146e-04 - val_mse: 2.1146e-04 - val_mae: 0.0102 Epoch 62/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1697e-04 - mse: 2.1697e-04 - mae: 0.0103 - val_loss: 2.1021e-04 - val_mse: 2.1021e-04 - val_mae: 0.0102 Epoch 63/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0780e-04 - mse: 2.0780e-04 - mae: 0.0101 - val_loss: 2.0484e-04 - val_mse: 2.0484e-04 - val_mae: 0.0100 Epoch 64/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0865e-04 - mse: 2.0865e-04 - mae: 0.0100 - val_loss: 2.0669e-04 - val_mse: 2.0669e-04 - val_mae: 0.0101 Epoch 65/150 6000/6000 [==============================] - 0s 16us/sample - loss: 2.0439e-04 - mse: 2.0439e-04 - mae: 0.0099 - val_loss: 2.0653e-04 - val_mse: 2.0653e-04 - val_mae: 0.0101 Epoch 66/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0654e-04 - mse: 2.0654e-04 - mae: 0.0100 - val_loss: 2.4025e-04 - val_mse: 2.4025e-04 - val_mae: 0.0108 Epoch 67/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0640e-04 - mse: 2.0640e-04 - mae: 0.0100 - val_loss: 2.0554e-04 - val_mse: 2.0554e-04 - val_mae: 0.0100 Epoch 68/150 6000/6000 [==============================] - 0s 22us/sample - loss: 2.0314e-04 - mse: 2.0314e-04 - mae: 0.0099 - val_loss: 2.0603e-04 - val_mse: 2.0603e-04 - val_mae: 0.0101 Epoch 69/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.0521e-04 - mse: 2.0521e-04 - mae: 0.0099 - val_loss: 2.0842e-04 - val_mse: 2.0842e-04 - val_mae: 0.0101 Epoch 70/150 6000/6000 [==============================] - 0s 22us/sample - loss: 2.0609e-04 - mse: 2.0609e-04 - mae: 0.0101 - val_loss: 2.4870e-04 - val_mse: 2.4870e-04 - val_mae: 0.0113 Epoch 71/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1514e-04 - mse: 2.1514e-04 - mae: 0.0103 - val_loss: 2.1257e-04 - val_mse: 2.1257e-04 - val_mae: 0.0104 Epoch 72/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.1151e-04 - mse: 2.1151e-04 - mae: 0.0102 - val_loss: 2.0492e-04 - val_mse: 2.0492e-04 - val_mae: 0.0100 Epoch 73/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0748e-04 - mse: 2.0748e-04 - mae: 0.0101 - val_loss: 2.1348e-04 - val_mse: 2.1348e-04 - val_mae: 0.0106 Epoch 74/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0494e-04 - mse: 2.0494e-04 - mae: 0.0100 - val_loss: 2.0796e-04 - val_mse: 2.0796e-04 - val_mae: 0.0101 Epoch 75/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0722e-04 - mse: 2.0722e-04 - mae: 0.0101 - val_loss: 2.2845e-04 - val_mse: 2.2845e-04 - val_mae: 0.0107 Epoch 76/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0715e-04 - mse: 2.0715e-04 - mae: 0.0100 - val_loss: 2.0573e-04 - val_mse: 2.0573e-04 - val_mae: 0.0101 Epoch 77/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0592e-04 - mse: 2.0592e-04 - mae: 0.0100 - val_loss: 2.1982e-04 - val_mse: 2.1982e-04 - val_mae: 0.0106 Epoch 78/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0703e-04 - mse: 2.0703e-04 - mae: 0.0101 - val_loss: 2.1589e-04 - val_mse: 2.1589e-04 - val_mae: 0.0103 Epoch 79/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1072e-04 - mse: 2.1072e-04 - mae: 0.0101 - val_loss: 2.0914e-04 - val_mse: 2.0914e-04 - val_mae: 0.0102 Epoch 80/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0939e-04 - mse: 2.0939e-04 - mae: 0.0101 - val_loss: 2.0498e-04 - val_mse: 2.0498e-04 - val_mae: 0.0101 Epoch 81/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.0651e-04 - mse: 2.0651e-04 - mae: 0.0100 - val_loss: 2.0773e-04 - val_mse: 2.0773e-04 - val_mae: 0.0102 Epoch 82/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.0912e-04 - mse: 2.0912e-04 - mae: 0.0101 - val_loss: 2.3717e-04 - val_mse: 2.3717e-04 - val_mae: 0.0108 Epoch 83/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.1648e-04 - mse: 2.1648e-04 - mae: 0.0103 - val_loss: 2.2496e-04 - val_mse: 2.2496e-04 - val_mae: 0.0103 Epoch 84/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0519e-04 - mse: 2.0519e-04 - mae: 0.0100 - val_loss: 2.2894e-04 - val_mse: 2.2894e-04 - val_mae: 0.0112 Epoch 85/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0997e-04 - mse: 2.0997e-04 - mae: 0.0101 - val_loss: 2.0544e-04 - val_mse: 2.0544e-04 - val_mae: 0.0101 Epoch 86/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0903e-04 - mse: 2.0903e-04 - mae: 0.0101 - val_loss: 2.2226e-04 - val_mse: 2.2226e-04 - val_mae: 0.0107 Epoch 87/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0845e-04 - mse: 2.0845e-04 - mae: 0.0101 - val_loss: 2.1076e-04 - val_mse: 2.1076e-04 - val_mae: 0.0101 Epoch 88/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0752e-04 - mse: 2.0752e-04 - mae: 0.0100 - val_loss: 2.1037e-04 - val_mse: 2.1037e-04 - val_mae: 0.0103 Epoch 89/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0909e-04 - mse: 2.0909e-04 - mae: 0.0102 - val_loss: 2.0547e-04 - val_mse: 2.0547e-04 - val_mae: 0.0101 Epoch 90/150 6000/6000 [==============================] - 0s 23us/sample - loss: 2.0938e-04 - mse: 2.0938e-04 - mae: 0.0101 - val_loss: 2.0958e-04 - val_mse: 2.0958e-04 - val_mae: 0.0103 Epoch 91/150 6000/6000 [==============================] - 0s 25us/sample - loss: 2.0941e-04 - mse: 2.0941e-04 - mae: 0.0101 - val_loss: 2.0413e-04 - val_mse: 2.0413e-04 - val_mae: 0.0100 Epoch 92/150 6000/6000 [==============================] - 0s 31us/sample - loss: 2.1153e-04 - mse: 2.1153e-04 - mae: 0.0102 - val_loss: 2.0724e-04 - val_mse: 2.0724e-04 - val_mae: 0.0101 Epoch 93/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.0963e-04 - mse: 2.0963e-04 - mae: 0.0101 - val_loss: 2.1481e-04 - val_mse: 2.1481e-04 - val_mae: 0.0103 Epoch 94/150 6000/6000 [==============================] - 0s 22us/sample - loss: 2.0664e-04 - mse: 2.0664e-04 - mae: 0.0100 - val_loss: 2.0663e-04 - val_mse: 2.0663e-04 - val_mae: 0.0101 Epoch 95/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0648e-04 - mse: 2.0648e-04 - mae: 0.0100 - val_loss: 2.1953e-04 - val_mse: 2.1953e-04 - val_mae: 0.0104 Epoch 96/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0999e-04 - mse: 2.0999e-04 - mae: 0.0101 - val_loss: 2.1595e-04 - val_mse: 2.1595e-04 - val_mae: 0.0102 Epoch 97/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0396e-04 - mse: 2.0396e-04 - mae: 0.0099 - val_loss: 2.1427e-04 - val_mse: 2.1427e-04 - val_mae: 0.0102 Epoch 98/150 6000/6000 [==============================] - 0s 25us/sample - loss: 2.1184e-04 - mse: 2.1184e-04 - mae: 0.0102 - val_loss: 2.0756e-04 - val_mse: 2.0756e-04 - val_mae: 0.0102 Epoch 99/150 6000/6000 [==============================] - 0s 26us/sample - loss: 2.1003e-04 - mse: 2.1003e-04 - mae: 0.0101 - val_loss: 2.0564e-04 - val_mse: 2.0564e-04 - val_mae: 0.0101 Epoch 100/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1012e-04 - mse: 2.1012e-04 - mae: 0.0101 - val_loss: 2.0420e-04 - val_mse: 2.0420e-04 - val_mae: 0.0100 Epoch 101/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0987e-04 - mse: 2.0987e-04 - mae: 0.0102 - val_loss: 2.1625e-04 - val_mse: 2.1625e-04 - val_mae: 0.0103 Epoch 102/150 6000/6000 [==============================] - 0s 22us/sample - loss: 2.0495e-04 - mse: 2.0495e-04 - mae: 0.0100 - val_loss: 2.0678e-04 - val_mse: 2.0678e-04 - val_mae: 0.0101 Epoch 103/150 6000/6000 [==============================] - 0s 28us/sample - loss: 2.0741e-04 - mse: 2.0741e-04 - mae: 0.0100 - val_loss: 2.0426e-04 - val_mse: 2.0426e-04 - val_mae: 0.0100 Epoch 104/150 6000/6000 [==============================] - 0s 37us/sample - loss: 2.0871e-04 - mse: 2.0871e-04 - mae: 0.0101 - val_loss: 2.1680e-04 - val_mse: 2.1680e-04 - val_mae: 0.0102 Epoch 105/150 6000/6000 [==============================] - 0s 35us/sample - loss: 2.0624e-04 - mse: 2.0624e-04 - mae: 0.0100 - val_loss: 2.2684e-04 - val_mse: 2.2684e-04 - val_mae: 0.0104 Epoch 106/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0798e-04 - mse: 2.0798e-04 - mae: 0.0101 - val_loss: 2.0569e-04 - val_mse: 2.0569e-04 - val_mae: 0.0100 Epoch 107/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1452e-04 - mse: 2.1452e-04 - mae: 0.0103 - val_loss: 2.3176e-04 - val_mse: 2.3176e-04 - val_mae: 0.0112 Epoch 108/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0857e-04 - mse: 2.0857e-04 - mae: 0.0100 - val_loss: 2.0492e-04 - val_mse: 2.0492e-04 - val_mae: 0.0100 Epoch 109/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0791e-04 - mse: 2.0791e-04 - mae: 0.0101 - val_loss: 2.0602e-04 - val_mse: 2.0602e-04 - val_mae: 0.0101 Epoch 110/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0825e-04 - mse: 2.0825e-04 - mae: 0.0101 - val_loss: 2.1078e-04 - val_mse: 2.1078e-04 - val_mae: 0.0104 Epoch 111/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.1184e-04 - mse: 2.1184e-04 - mae: 0.0102 - val_loss: 2.0797e-04 - val_mse: 2.0797e-04 - val_mae: 0.0101 Epoch 112/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0773e-04 - mse: 2.0773e-04 - mae: 0.0100 - val_loss: 2.0497e-04 - val_mse: 2.0497e-04 - val_mae: 0.0100 Epoch 113/150 6000/6000 [==============================] - 0s 21us/sample - loss: 2.0828e-04 - mse: 2.0828e-04 - mae: 0.0100 - val_loss: 2.0404e-04 - val_mse: 2.0404e-04 - val_mae: 0.0100 Epoch 114/150 6000/6000 [==============================] - 0s 22us/sample - loss: 2.0335e-04 - mse: 2.0335e-04 - mae: 0.0099 - val_loss: 2.1987e-04 - val_mse: 2.1987e-04 - val_mae: 0.0102 Epoch 115/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1294e-04 - mse: 2.1294e-04 - mae: 0.0102 - val_loss: 2.1309e-04 - val_mse: 2.1309e-04 - val_mae: 0.0103 Epoch 116/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0714e-04 - mse: 2.0714e-04 - mae: 0.0100 - val_loss: 2.0567e-04 - val_mse: 2.0567e-04 - val_mae: 0.0100 Epoch 117/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0871e-04 - mse: 2.0871e-04 - mae: 0.0100 - val_loss: 2.1152e-04 - val_mse: 2.1152e-04 - val_mae: 0.0101 Epoch 118/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0962e-04 - mse: 2.0962e-04 - mae: 0.0101 - val_loss: 2.4746e-04 - val_mse: 2.4746e-04 - val_mae: 0.0111 Epoch 119/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0867e-04 - mse: 2.0867e-04 - mae: 0.0101 - val_loss: 2.4072e-04 - val_mse: 2.4072e-04 - val_mae: 0.0113 Epoch 120/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1451e-04 - mse: 2.1451e-04 - mae: 0.0103 - val_loss: 2.0754e-04 - val_mse: 2.0754e-04 - val_mae: 0.0102 Epoch 121/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0827e-04 - mse: 2.0827e-04 - mae: 0.0100 - val_loss: 2.0498e-04 - val_mse: 2.0498e-04 - val_mae: 0.0100 Epoch 122/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0502e-04 - mse: 2.0502e-04 - mae: 0.0099 - val_loss: 2.0897e-04 - val_mse: 2.0897e-04 - val_mae: 0.0103 Epoch 123/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0723e-04 - mse: 2.0723e-04 - mae: 0.0100 - val_loss: 2.0530e-04 - val_mse: 2.0530e-04 - val_mae: 0.0100 Epoch 124/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0570e-04 - mse: 2.0570e-04 - mae: 0.0100 - val_loss: 2.0510e-04 - val_mse: 2.0510e-04 - val_mae: 0.0101 Epoch 125/150 6000/6000 [==============================] - 0s 24us/sample - loss: 2.0827e-04 - mse: 2.0827e-04 - mae: 0.0101 - val_loss: 2.0821e-04 - val_mse: 2.0821e-04 - val_mae: 0.0101 Epoch 126/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0928e-04 - mse: 2.0928e-04 - mae: 0.0101 - val_loss: 2.1632e-04 - val_mse: 2.1632e-04 - val_mae: 0.0103 Epoch 127/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0812e-04 - mse: 2.0812e-04 - mae: 0.0101 - val_loss: 2.0579e-04 - val_mse: 2.0579e-04 - val_mae: 0.0100 Epoch 128/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0588e-04 - mse: 2.0588e-04 - mae: 0.0100 - val_loss: 2.1328e-04 - val_mse: 2.1328e-04 - val_mae: 0.0104 Epoch 129/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0532e-04 - mse: 2.0532e-04 - mae: 0.0100 - val_loss: 2.0419e-04 - val_mse: 2.0419e-04 - val_mae: 0.0100 Epoch 130/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0862e-04 - mse: 2.0862e-04 - mae: 0.0101 - val_loss: 2.8068e-04 - val_mse: 2.8068e-04 - val_mae: 0.0120 Epoch 131/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.1307e-04 - mse: 2.1307e-04 - mae: 0.0102 - val_loss: 2.0779e-04 - val_mse: 2.0779e-04 - val_mae: 0.0101 Epoch 132/150 6000/6000 [==============================] - 0s 22us/sample - loss: 2.0460e-04 - mse: 2.0460e-04 - mae: 0.0099 - val_loss: 2.0596e-04 - val_mse: 2.0596e-04 - val_mae: 0.0101 Epoch 133/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0707e-04 - mse: 2.0707e-04 - mae: 0.0100 - val_loss: 2.0930e-04 - val_mse: 2.0930e-04 - val_mae: 0.0103 Epoch 134/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1055e-04 - mse: 2.1055e-04 - mae: 0.0101 - val_loss: 2.0878e-04 - val_mse: 2.0878e-04 - val_mae: 0.0101 Epoch 135/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0478e-04 - mse: 2.0478e-04 - mae: 0.0100 - val_loss: 2.2692e-04 - val_mse: 2.2692e-04 - val_mae: 0.0105 Epoch 136/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0984e-04 - mse: 2.0984e-04 - mae: 0.0101 - val_loss: 2.3142e-04 - val_mse: 2.3142e-04 - val_mae: 0.0105 Epoch 137/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0541e-04 - mse: 2.0541e-04 - mae: 0.0100 - val_loss: 2.0881e-04 - val_mse: 2.0881e-04 - val_mae: 0.0102 Epoch 138/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1356e-04 - mse: 2.1356e-04 - mae: 0.0102 - val_loss: 2.5252e-04 - val_mse: 2.5252e-04 - val_mae: 0.0112 Epoch 139/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0599e-04 - mse: 2.0599e-04 - mae: 0.0100 - val_loss: 2.2998e-04 - val_mse: 2.2998e-04 - val_mae: 0.0106 Epoch 140/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.1196e-04 - mse: 2.1196e-04 - mae: 0.0102 - val_loss: 2.4075e-04 - val_mse: 2.4075e-04 - val_mae: 0.0113 Epoch 141/150 6000/6000 [==============================] - 0s 20us/sample - loss: 2.0741e-04 - mse: 2.0741e-04 - mae: 0.0100 - val_loss: 2.0399e-04 - val_mse: 2.0399e-04 - val_mae: 0.0100 Epoch 142/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0851e-04 - mse: 2.0851e-04 - mae: 0.0101 - val_loss: 2.2038e-04 - val_mse: 2.2038e-04 - val_mae: 0.0104 Epoch 143/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0664e-04 - mse: 2.0664e-04 - mae: 0.0100 - val_loss: 2.1827e-04 - val_mse: 2.1827e-04 - val_mae: 0.0104 Epoch 144/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.1112e-04 - mse: 2.1112e-04 - mae: 0.0101 - val_loss: 2.0971e-04 - val_mse: 2.0971e-04 - val_mae: 0.0101 Epoch 145/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0970e-04 - mse: 2.0970e-04 - mae: 0.0101 - val_loss: 2.1187e-04 - val_mse: 2.1187e-04 - val_mae: 0.0101 Epoch 146/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0712e-04 - mse: 2.0712e-04 - mae: 0.0100 - val_loss: 2.0950e-04 - val_mse: 2.0950e-04 - val_mae: 0.0101 Epoch 147/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0411e-04 - mse: 2.0411e-04 - mae: 0.0099 - val_loss: 2.0427e-04 - val_mse: 2.0427e-04 - val_mae: 0.0101 Epoch 148/150 6000/6000 [==============================] - 0s 19us/sample - loss: 2.0920e-04 - mse: 2.0920e-04 - mae: 0.0101 - val_loss: 2.2558e-04 - val_mse: 2.2558e-04 - val_mae: 0.0106 Epoch 149/150 6000/6000 [==============================] - 0s 17us/sample - loss: 2.0716e-04 - mse: 2.0716e-04 - mae: 0.0100 - val_loss: 2.0976e-04 - val_mse: 2.0976e-04 - val_mae: 0.0101 Epoch 150/150 6000/6000 [==============================] - 0s 18us/sample - loss: 2.0938e-04 - mse: 2.0938e-04 - mae: 0.0101 - val_loss: 2.0984e-04 - val_mse: 2.0984e-04 - val_mae: 0.0102
fig = go.Figure()
fig.add_trace(go.Scatter(y=history.history['loss'],
mode='lines',
name='Train'))
fig.add_trace(go.Scatter(y=history.history['val_loss'],
mode='lines+markers',
name='Validation'))
fig.update_layout(
autosize=False,
width=1500,
height=750,
title = "Train vs. Validation Loss Test",
xaxis=dict(
title_text="No. of epochs",
titlefont=dict(size=20),
),
yaxis=dict(
title_text="Loss Value",
titlefont=dict(size=20),
)
)
fig.show()
Create a new array with dummy data and test the model's effeftiveness against it
predict_full = init_array(25000)
valid_df = predict_full.iloc[:,:-1]
valid_df.tail(100)
| yr5_prep | yr4_forecast | yr3_plan | yr2_approp | yr1_oblig | |
|---|---|---|---|---|---|
| 24900 | 10263133 | 12085183.64 | 11770593.84 | 12345244.27 | 12144695.28 |
| 24901 | 15306374 | 15558297.88 | 10143313.48 | 12859491.09 | 12970167.63 |
| 24902 | 13227973 | 9850936.33 | 7338988.13 | 8977414.97 | 7267718.52 |
| 24903 | 20214368 | 26617934.29 | 19213559.69 | 18660269.50 | 20990016.41 |
| 24904 | 18235171 | 21579959.31 | 26568097.27 | 28212999.52 | 23262550.56 |
| ... | ... | ... | ... | ... | ... |
| 24995 | 9788547 | 6300426.47 | 7560977.45 | 7878784.88 | 6764996.38 |
| 24996 | 16761939 | 12330805.03 | 10389995.58 | 9104177.25 | 7615890.77 |
| 24997 | 29827735 | 42364350.78 | 40640610.43 | 48209623.84 | 52804780.69 |
| 24998 | 16921721 | 18158171.51 | 12144862.28 | 9957838.26 | 9597605.40 |
| 24999 | 23722133 | 14946131.09 | 15370144.23 | 15548594.29 | 12727921.96 |
100 rows × 5 columns
Convert the dataframe to a two dimensional numpy array
valid_array = valid_df.to_numpy()
valid_array
array([[ 4965740. , 3738887.84, 4867841.01, 5816143.56, 6621303.04],
[18101446. , 22730574.62, 30983838.52, 30839488.77, 27999287.96],
[ 7845234. , 6211919.99, 5210883.93, 4389377.84, 3755063.11],
...,
[29827735. , 42364350.78, 40640610.43, 48209623.84, 52804780.69],
[16921721. , 18158171.51, 12144862.28, 9957838.26, 9597605.4 ],
[23722133. , 14946131.09, 15370144.23, 15548594.29, 12727921.96]])
Validate the shape of the newly created array
valid_array.shape
(25000, 5)
predict_val = model.predict(valid_array)
predict_val
array([[ 6415405. ],
[27905628. ],
[ 3900029.5],
...,
[50750996. ],
[ 9947363. ],
[13214944. ]], dtype=float32)
Merge the numpy predictor array as a standalone column to the predict_full dataframe
predict_full['predict_values'] = predict_val
predict_full.tail(10)
| yr5_prep | yr4_forecast | yr3_plan | yr2_approp | yr1_oblig | yr0_exe | predict_values | |
|---|---|---|---|---|---|---|---|
| 24990 | 8666512 | 8243131.54 | 11250496.92 | 12206377.00 | 12144593.97 | 11714592.93 | 11976465.0 |
| 24991 | 24778892 | 13909981.82 | 8446511.62 | 6536875.09 | 6397384.96 | 6077246.06 | 6661327.5 |
| 24992 | 11527898 | 11208410.07 | 12353532.44 | 11700882.04 | 12725154.52 | 12818345.15 | 12532112.0 |
| 24993 | 4823458 | 4707668.91 | 4312906.57 | 4072630.44 | 4188341.56 | 3833207.37 | 4221225.5 |
| 24994 | 26754281 | 18898974.07 | 19330708.42 | 13554253.74 | 11233619.02 | 12059793.16 | 11689827.0 |
| 24995 | 9788547 | 6300426.47 | 7560977.45 | 7878784.88 | 6764996.38 | 7188160.59 | 7014779.5 |
| 24996 | 16761939 | 12330805.03 | 10389995.58 | 9104177.25 | 7615890.77 | 7192851.68 | 7912705.0 |
| 24997 | 29827735 | 42364350.78 | 40640610.43 | 48209623.84 | 52804780.69 | 55065747.11 | 50750996.0 |
| 24998 | 16921721 | 18158171.51 | 12144862.28 | 9957838.26 | 9597605.40 | 9776096.89 | 9947363.0 |
| 24999 | 23722133 | 14946131.09 | 15370144.23 | 15548594.29 | 12727921.96 | 12903799.75 | 13214944.0 |
Calculate the difference in actual ('yr0_exe') and predicted ('predict_values') model values and assign a difference value for each record in the 'delta' column
predict_full['delta'] = (predict_full['yr0_exe'] - predict_full['predict_values']) / predict_full['yr0_exe']
predict_full.tail(10)
| yr5_prep | yr4_forecast | yr3_plan | yr2_approp | yr1_oblig | yr0_exe | predict_values | delta | |
|---|---|---|---|---|---|---|---|---|
| 24990 | 8666512 | 8243131.54 | 11250496.92 | 12206377.00 | 12144593.97 | 11714592.93 | 11976465.0 | -0.022354 |
| 24991 | 24778892 | 13909981.82 | 8446511.62 | 6536875.09 | 6397384.96 | 6077246.06 | 6661327.5 | -0.096110 |
| 24992 | 11527898 | 11208410.07 | 12353532.44 | 11700882.04 | 12725154.52 | 12818345.15 | 12532112.0 | 0.022330 |
| 24993 | 4823458 | 4707668.91 | 4312906.57 | 4072630.44 | 4188341.56 | 3833207.37 | 4221225.5 | -0.101225 |
| 24994 | 26754281 | 18898974.07 | 19330708.42 | 13554253.74 | 11233619.02 | 12059793.16 | 11689827.0 | 0.030678 |
| 24995 | 9788547 | 6300426.47 | 7560977.45 | 7878784.88 | 6764996.38 | 7188160.59 | 7014779.5 | 0.024120 |
| 24996 | 16761939 | 12330805.03 | 10389995.58 | 9104177.25 | 7615890.77 | 7192851.68 | 7912705.0 | -0.100079 |
| 24997 | 29827735 | 42364350.78 | 40640610.43 | 48209623.84 | 52804780.69 | 55065747.11 | 50750996.0 | 0.078356 |
| 24998 | 16921721 | 18158171.51 | 12144862.28 | 9957838.26 | 9597605.40 | 9776096.89 | 9947363.0 | -0.017519 |
| 24999 | 23722133 | 14946131.09 | 15370144.23 | 15548594.29 | 12727921.96 | 12903799.75 | 13214944.0 | -0.024113 |
Display the histogram of the delta values (i.e distribution)
fig = px.histogram(predict_full, x="delta",marginal="rug", # can be `box`, `violin`
hover_data=predict_full.columns, color_discrete_sequence=['indianred'], opacity = 0.5)
fig.update_layout(
autosize=True,
title = "Actual vs Prediction value Historgram ")
fig.show()
Display distribution of box & whisker plot for response and predict values
yr5 = dataset.yr0_exe
yr6 = predict_full.predict_values
fig = go.Figure()
fig.add_trace(go.Box(x=yr5, name = "yr0_exe"))
fig.add_trace(go.Box(x=yr6, name = "predict_values"))
fig.show()
Retrieve the statistical parameters for the linear model
x = predict_full['yr0_exe']
y = predict_full['predict_values']
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print(" Slope: {}\n Intercept: {}\n R-squared: {}\n P-Value: {}\n Standard Error: {}". format(slope, intercept, r_value, p_value, std_err))
Slope: 0.962922264545949 Intercept: 508926.6434964407 R-squared: 0.9945694919098771 P-Value: 0.0 Standard Error: 0.0006373064030624608
Plot the response values (original) against the predicted values
fig = px.scatter(predict_full, x="yr0_exe", y="predict_values", trendline="ols", opacity=0.25, color_discrete_sequence=['green'])
fig.update_layout(
autosize=False,
width=1000,
height=750,
title = "Response values vs predicted values scatterplot",
xaxis=dict(
title_text="yr0_exe values (Response Values)",
titlefont=dict(size=20),
),
yaxis=dict(
title_text="predict_values (Predicted Values)",
titlefont=dict(size=20),
)
)
fig.show()
df_size = len(predict_full)
df_size
25000
lat_random = np.random.uniform(low=19.50139, high=64.85694, size=df_size)
long_random = np.random.uniform(low=-161.75583, high=-68.01197, size=df_size)
len(long_random)
25000
geo_df = predict_full.copy()
geo_df['latitude'] = lat_random
geo_df['longitude'] = long_random
geo_df.tail(10)
| yr5_prep | yr4_forecast | yr3_plan | yr2_approp | yr1_oblig | yr0_exe | predict_values | delta | latitude | longitude | |
|---|---|---|---|---|---|---|---|---|---|---|
| 24990 | 8666512 | 8243131.54 | 11250496.92 | 12206377.00 | 12144593.97 | 11714592.93 | 11976465.0 | -0.022354 | 56.956680 | -117.828504 |
| 24991 | 24778892 | 13909981.82 | 8446511.62 | 6536875.09 | 6397384.96 | 6077246.06 | 6661327.5 | -0.096110 | 38.582160 | -158.420835 |
| 24992 | 11527898 | 11208410.07 | 12353532.44 | 11700882.04 | 12725154.52 | 12818345.15 | 12532112.0 | 0.022330 | 36.536841 | -120.499832 |
| 24993 | 4823458 | 4707668.91 | 4312906.57 | 4072630.44 | 4188341.56 | 3833207.37 | 4221225.5 | -0.101225 | 32.585896 | -139.306922 |
| 24994 | 26754281 | 18898974.07 | 19330708.42 | 13554253.74 | 11233619.02 | 12059793.16 | 11689827.0 | 0.030678 | 43.166910 | -157.715695 |
| 24995 | 9788547 | 6300426.47 | 7560977.45 | 7878784.88 | 6764996.38 | 7188160.59 | 7014779.5 | 0.024120 | 47.816693 | -99.858608 |
| 24996 | 16761939 | 12330805.03 | 10389995.58 | 9104177.25 | 7615890.77 | 7192851.68 | 7912705.0 | -0.100079 | 38.369400 | -136.769717 |
| 24997 | 29827735 | 42364350.78 | 40640610.43 | 48209623.84 | 52804780.69 | 55065747.11 | 50750996.0 | 0.078356 | 35.014881 | -74.650214 |
| 24998 | 16921721 | 18158171.51 | 12144862.28 | 9957838.26 | 9597605.40 | 9776096.89 | 9947363.0 | -0.017519 | 50.436161 | -121.712797 |
| 24999 | 23722133 | 14946131.09 | 15370144.23 | 15548594.29 | 12727921.96 | 12903799.75 | 13214944.0 | -0.024113 | 41.476404 | -89.167761 |
fig = px.scatter(geo_df[:50], x="longitude", y="latitude", opacity=0.25, size = "predict_values", color_discrete_sequence=['green'])
fig.update_layout(
autosize=False,
width=1000,
height=750,
title = "US Map of Budget points",
xaxis=dict(
title_text="Longitude",
titlefont=dict(size=20),
),
yaxis=dict(
title_text="Latitude",
titlefont=dict(size=20),
)
)
fig.show()
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
program_end = t.time() - program_start
elapsed = round(program_end, 2)
print("Total time for program execution is {} seconds".format(elapsed))
Total time for program execution is 30.2 seconds
import descartes
import geopandas gpd
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 11 | Afghanistan | Asia | 2007 | 43.828 | 31889923 | 974.580338 | AFG | 4 |
| 23 | Albania | Europe | 2007 | 76.423 | 3600523 | 5937.029526 | ALB | 8 |
| 35 | Algeria | Africa | 2007 | 72.301 | 33333216 | 6223.367465 | DZA | 12 |
| 47 | Angola | Africa | 2007 | 42.731 | 12420476 | 4797.231267 | AGO | 24 |
| 59 | Argentina | Americas | 2007 | 75.320 | 40301927 | 12779.379640 | ARG | 32 |
| 71 | Australia | Oceania | 2007 | 81.235 | 20434176 | 34435.367440 | AUS | 36 |
| 83 | Austria | Europe | 2007 | 79.829 | 8199783 | 36126.492700 | AUT | 40 |
| 95 | Bahrain | Asia | 2007 | 75.635 | 708573 | 29796.048340 | BHR | 48 |
| 107 | Bangladesh | Asia | 2007 | 64.062 | 150448339 | 1391.253792 | BGD | 50 |
| 119 | Belgium | Europe | 2007 | 79.441 | 10392226 | 33692.605080 | BEL | 56 |